Every year, well over 300,000 individuals are arrested in Virginia for offenses big and small. What happens next can vary widely. Most are released quickly on their own recognizance or with a bond posted to ensure that they return for their court date, but others face pretrial detention, sometimes all the way to trial even before being convicted of a crime.
This analysis will look at the role that race and geography (i.e. judicial circuit) play in determining decisions about pretrial release. While controling for factors that contribute to release conditions, we will seek to detect if racial disparities are evident in the available data concerning pretrial release and if those disparities vary across judicial district. Using partial pooling and hierarchical modeling we intend to evaluate posterior distributions on race and geography as predictors of pretrial treatment of individuals, and thereby quantify the uncertainty in the model.
The data set contains approximately 22,000 de-identified individuals with contact events with the criminal justice system in October of 2017. The data contains over 700 variables, including demographic information about the defendants as well as their criminal history, locality, offense, lawyer type (e.g. public defender or private attorney), and their pretrial outcomes. We have reduced these to a manageable set of predictors and the binary response variable of whether a defendant was held for their entire pretrial period.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az
#initial paring down of full dataset
# df=pd.read_csv("http://www.vcsc.virginia.gov/pretrialdataproject/October%202017%20Cohort_Virginia%20Pretrial%20Data%20Project_Deidentified%20FINAL%20Update_10272021.csv", low_memory=False)
"""
cols=df.columns
selected_cols=[item[1] for item in
[(2, 'Defendant_Sex'),
(3, 'Defendant_Race'),
(5, 'Defendant_Age'),
(9, 'Defendant_IndigencyStatus'),
(12, 'WhetherDefendantWasReleasedPretrial'),
(14, 'DaysBetweenContactEventAndPretrialRelease'),
(16, 'PretrialReleaseType2'),
(17, 'BondTypeAtInitialContact'),
(18, 'BondAmountAtInitialContact'),
(19, 'BondTypeAtRelease_v1'),
(21, 'BondAmountAtRelease'),
(25, 'Indicator_PresumptiveDenialOfBail_19.2_120'),
(168, 'PriorArrests'),
(301, 'Locality_JudicialCircuit'),
(303, 'Locality_MagisterialRegion'),
(563, 'FollowUp_ChargedWithNewFTA'),
(572, 'FollowUp_ArrestedforNewOff'),
(708, 'CrimeCommission2021ReportClassificationofDefendants')]]+['VPRAI_TotalPoints_Opt1',
'VPRAI_TotalPoints_Opt2',
'PSA_FTA_TotalPoints',
'PSA_NCA_TotalPoints',
'PSA_NVCA_TotalPoints']
"""
"\ncols=df.columns\nselected_cols=[item[1] for item in\n [(2, 'Defendant_Sex'),\n (3, 'Defendant_Race'),\n (5, 'Defendant_Age'),\n (9, 'Defendant_IndigencyStatus'),\n (12, 'WhetherDefendantWasReleasedPretrial'),\n (14, 'DaysBetweenContactEventAndPretrialRelease'),\n (16, 'PretrialReleaseType2'),\n (17, 'BondTypeAtInitialContact'),\n (18, 'BondAmountAtInitialContact'),\n (19, 'BondTypeAtRelease_v1'),\n (21, 'BondAmountAtRelease'),\n (25, 'Indicator_PresumptiveDenialOfBail_19.2_120'),\n (168, 'PriorArrests'),\n (301, 'Locality_JudicialCircuit'),\n (303, 'Locality_MagisterialRegion'),\n (563, 'FollowUp_ChargedWithNewFTA'),\n (572, 'FollowUp_ArrestedforNewOff'),\n (708, 'CrimeCommission2021ReportClassificationofDefendants')]]+['VPRAI_TotalPoints_Opt1',\n 'VPRAI_TotalPoints_Opt2',\n 'PSA_FTA_TotalPoints',\n 'PSA_NCA_TotalPoints',\n 'PSA_NVCA_TotalPoints']\n "
# df[selected_cols].to_csv("SelectPretrialData.csv", index=False)
df=pd.read_csv("SelectPretrialData.csv")
The overwhelming majority of defendants are identified as white or black, so we will focus just on these.
Pretrial release is coded 0, 1, or 9 for unclear. There are only 30 unknowns, or .13% which won't affect the analysis so these are dropped.
13 records do not include a Judicial Circuit so these are dropped.
cols=['Defendant_Race', 'WhetherDefendantWasReleasedPretrial', 'Locality_JudicialCircuit']
minimal_df=df[cols].query("((Defendant_Race=='W')|(Defendant_Race=='B'))")
minimal_df=minimal_df.query("Locality_JudicialCircuit!=' '")
minimal_df=minimal_df.query("WhetherDefendantWasReleasedPretrial!=9")
#base rate of pre-trial release for all defendants across Virginia
ax = minimal_df['WhetherDefendantWasReleasedPretrial'].replace({1:"Released",0:"Held Pretrial"}).value_counts(normalize=True).plot(kind='bar')
for p in ax.patches:
ax.annotate(str(round(p.get_height(),2)), (p.get_x() * 1.05, p.get_height() * 1.005))
plt.title("Proportion of defendants released pretrial")
plt.xticks(rotation=0)
plt.savefig("Proportion of defendants released pretrial.png")
plt.show()
bw=pd.crosstab(minimal_df['Defendant_Race'],1-minimal_df['WhetherDefendantWasReleasedPretrial'], normalize='index')
bw
| WhetherDefendantWasReleasedPretrial | 0 | 1 |
|---|---|---|
| Defendant_Race | ||
| B | 0.811538 | 0.188462 |
| W | 0.846265 | 0.153735 |
ax=bw.plot.bar(stacked=True)
for p in ax.patches:
if p.get_height()>.8:
ax.annotate(str(round(p.get_height(),2)), (p.get_x()+.2, p.get_height() * 1.005))
plt.legend(['Released', 'Held Pretrial'],loc='lower left')
plt.title("White defendants are released at a higher rate than Black defendants")
plt.savefig("White defendants released.png");
dist_bw=minimal_df.groupby(["Locality_JudicialCircuit",'Defendant_Race']).agg({'WhetherDefendantWasReleasedPretrial':'mean'})
dist_bw=dist_bw.reset_index()
dist_bw_pivot=dist_bw.pivot_table(columns=['Defendant_Race'], index=['Locality_JudicialCircuit'])
dist_bw_pivot['Difference']=100*(dist_bw_pivot['WhetherDefendantWasReleasedPretrial']['W']-dist_bw_pivot['WhetherDefendantWasReleasedPretrial']['B'])
fig, ax=plt.subplots(figsize=(10,10))
plt.stem(dist_bw_pivot.index, dist_bw_pivot['Difference'])
plt.xticks(np.arange(1,32),rotation=70)
plt.ylabel("Difference in release rates")
plt.xlabel("Judicial District")
plt.suptitle("White Defendants are released at a higher rate in 24 of 31 Judicial Districts")
plt.title("Stems above the line indicate a higher rate of release for White defendants")
plt.savefig("District differences.png");
So just looking at proportions we have an intuition that there may be variation in treatment of defendants based on race in the overall population, and that we may have some districts where that difference is more pronounced.
To investigate this phenomenon we will apply a hierarchical model to the data.
import pymc as pm
import arviz as az
az.style.use('arviz-darkgrid')
SEED = 20190518 # from random.org, for reproducibility
np.random.seed(SEED)
import scipy as sp
minimal_df['Indicator_Black']=(minimal_df['Defendant_Race']=='B').astype(int)
jud_districts = minimal_df.Locality_JudicialCircuit.unique()
districts = len(jud_districts)
district = minimal_df['Locality_JudicialCircuit'].values.astype(float).astype(int)-1
np.unique(district)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
with pm.Model() as partial_pooling:
# Priors
mu_a = pm.Normal('mu_a', mu=0., sigma=1e5)
sigma_a = pm.HalfCauchy('sigma_a', 5)
# Random intercepts
a = pm.Normal('a', mu=mu_a, sigma=sigma_a, shape=districts)
# Model error
sigma_y = pm.HalfCauchy('sigma_y',5)
# Expected value
y_hat = a[district]
# Data likelihood
y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=minimal_df['WhetherDefendantWasReleasedPretrial'])
pm.model_to_graphviz(partial_pooling)
with partial_pooling:
advi_part_pool=pm.fit(40000, method = 'advi', random_seed = SEED)
Finished [100%]: Average Loss = 9,725.4
advi_elbo = pd.DataFrame(
{'ELBO': -advi_part_pool.hist,
'n': np.arange(advi_part_pool.hist.shape[0])})
_=sns.lineplot(y='ELBO', x='n', data=advi_elbo)
We see really strong convergence with ADVI.
az.summary(advi_part_pool.sample(), round_to=2)
arviz - WARNING - Shape validation failed: input_shape: (1, 500), minimum_shape: (chains=2, draws=4)
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| mu_a | 0.83 | 0.01 | 0.81 | 0.84 | 0.0 | 0.0 | 595.09 | 535.73 | NaN |
| a[0] | 0.77 | 0.02 | 0.74 | 0.80 | 0.0 | 0.0 | 484.71 | 474.04 | NaN |
| a[1] | 0.82 | 0.01 | 0.80 | 0.84 | 0.0 | 0.0 | 564.98 | 512.95 | NaN |
| a[2] | 0.80 | 0.02 | 0.77 | 0.84 | 0.0 | 0.0 | 516.49 | 498.67 | NaN |
| a[3] | 0.82 | 0.01 | 0.80 | 0.84 | 0.0 | 0.0 | 396.68 | 317.58 | NaN |
| a[4] | 0.83 | 0.02 | 0.79 | 0.86 | 0.0 | 0.0 | 456.14 | 504.56 | NaN |
| a[5] | 0.80 | 0.02 | 0.77 | 0.83 | 0.0 | 0.0 | 562.35 | 558.68 | NaN |
| a[6] | 0.85 | 0.02 | 0.82 | 0.88 | 0.0 | 0.0 | 437.14 | 438.47 | NaN |
| a[7] | 0.84 | 0.02 | 0.81 | 0.88 | 0.0 | 0.0 | 429.89 | 542.44 | NaN |
| a[8] | 0.84 | 0.02 | 0.81 | 0.87 | 0.0 | 0.0 | 518.96 | 407.46 | NaN |
| a[9] | 0.81 | 0.02 | 0.78 | 0.84 | 0.0 | 0.0 | 491.43 | 555.18 | NaN |
| a[10] | 0.81 | 0.02 | 0.78 | 0.84 | 0.0 | 0.0 | 334.00 | 425.00 | NaN |
| a[11] | 0.82 | 0.01 | 0.80 | 0.85 | 0.0 | 0.0 | 439.21 | 438.47 | NaN |
| a[12] | 0.80 | 0.01 | 0.78 | 0.82 | 0.0 | 0.0 | 570.34 | 510.31 | NaN |
| a[13] | 0.80 | 0.01 | 0.78 | 0.83 | 0.0 | 0.0 | 530.56 | 424.32 | NaN |
| a[14] | 0.80 | 0.01 | 0.78 | 0.82 | 0.0 | 0.0 | 417.19 | 384.56 | NaN |
| a[15] | 0.86 | 0.01 | 0.83 | 0.88 | 0.0 | 0.0 | 350.51 | 501.06 | NaN |
| a[16] | 0.83 | 0.02 | 0.79 | 0.86 | 0.0 | 0.0 | 515.50 | 393.51 | NaN |
| a[17] | 0.84 | 0.02 | 0.81 | 0.88 | 0.0 | 0.0 | 396.42 | 350.13 | NaN |
| a[18] | 0.92 | 0.01 | 0.90 | 0.94 | 0.0 | 0.0 | 470.65 | 458.50 | NaN |
| a[19] | 0.88 | 0.01 | 0.86 | 0.91 | 0.0 | 0.0 | 407.33 | 463.33 | NaN |
| a[20] | 0.78 | 0.02 | 0.74 | 0.82 | 0.0 | 0.0 | 413.31 | 555.18 | NaN |
| a[21] | 0.82 | 0.01 | 0.79 | 0.85 | 0.0 | 0.0 | 464.97 | 408.34 | NaN |
| a[22] | 0.83 | 0.01 | 0.80 | 0.85 | 0.0 | 0.0 | 464.00 | 438.95 | NaN |
| a[23] | 0.84 | 0.02 | 0.81 | 0.87 | 0.0 | 0.0 | 425.82 | 423.87 | NaN |
| a[24] | 0.85 | 0.01 | 0.83 | 0.88 | 0.0 | 0.0 | 405.93 | 416.00 | NaN |
| a[25] | 0.81 | 0.01 | 0.79 | 0.84 | 0.0 | 0.0 | 564.01 | 472.24 | NaN |
| a[26] | 0.85 | 0.01 | 0.83 | 0.88 | 0.0 | 0.0 | 505.30 | 450.03 | NaN |
| a[27] | 0.81 | 0.02 | 0.78 | 0.84 | 0.0 | 0.0 | 550.73 | 461.64 | NaN |
| a[28] | 0.79 | 0.02 | 0.75 | 0.82 | 0.0 | 0.0 | 501.34 | 463.33 | NaN |
| a[29] | 0.80 | 0.02 | 0.77 | 0.84 | 0.0 | 0.0 | 486.76 | 473.41 | NaN |
| a[30] | 0.88 | 0.01 | 0.85 | 0.90 | 0.0 | 0.0 | 481.79 | 421.32 | NaN |
| sigma_a | 0.04 | 0.01 | 0.03 | 0.05 | 0.0 | 0.0 | 459.43 | 461.64 | NaN |
| sigma_y | 0.37 | 0.00 | 0.37 | 0.38 | 0.0 | 0.0 | 500.34 | 473.99 | NaN |
az.plot_forest(advi_part_pool.sample());
az.plot_trace(advi_part_pool.sample(10000));
sample_trace = advi_part_pool.sample().posterior['a']
_, sample, districts=sample_trace.shape
jitter = np.random.normal(scale=0.01, size=districts)
means = sample_trace.mean(axis=1)[0,:]
sd = sample_trace.std(axis=1)[0,:]
fig, ax=plt.subplots()
ax.scatter(np.arange(31), means)
#ax.set_xscale('log')
ax.set_xlim(-0.9,32)
ax.set_ylim(0.7, 0.95)
ax.hlines(sample_trace.mean(), -0.9, 32, linestyles='--')
ax.set_ylabel("Proportion of pretrial release")
ax.set_xlabel("Judicial District")
plt.suptitle("Estimated chance of pretrial release by judicial district, using partial pooling")
for j,n,m,s in zip(jitter, np.arange(31), means, sd):
ax.plot([n]*2, [m-s, m+s], 'b-')
dist_only=minimal_df.groupby('Locality_JudicialCircuit').agg({'WhetherDefendantWasReleasedPretrial':'describe'})
mean=minimal_df['WhetherDefendantWasReleasedPretrial'].mean()
means=dist_only['WhetherDefendantWasReleasedPretrial']['mean'].values
sd=dist_only['WhetherDefendantWasReleasedPretrial']['std'].values
fig, ax=plt.subplots()
ax.scatter(np.arange(31), means)
#ax.set_xscale('log')
ax.set_xlim(-0.9,32)
ax.set_ylim(0.3, 1.3)
ax.hlines(sample_trace.mean(), -0.9, 32, linestyles='--')
ax.set_ylabel("Pretrial release rate")
ax.set_xlabel("Judicial District")
plt.title("Proportion of pretrial release by judicial district")
plt.suptitle("Frequentist Version")
for j,n,m,s in zip(jitter, np.arange(31), means, sd):
ax.plot([n]*2, [m-s, m+s], 'b-')
with pm.Model() as varying_intercept_slope:
# Priors
mu_a = pm.Normal('mu_a', mu=0., sigma=1e5)
sigma_a = pm.Exponential("sigma_a", 0.5)
mu_b = pm.Normal('mu_b', mu=0., sigma=1e5)
sigma_b = pm.HalfCauchy('sigma_b', 5)
#sigma_b = pm.Exponential("sigma_b", .05)
# Random intercepts
a = pm.Normal('a', mu=mu_a, sigma=sigma_a, shape=districts)
# Random slopes
b = pm.Normal('b', mu=mu_b, sigma=sigma_b, shape=districts)
# Model error
sigma_y = pm.Uniform('sigma_y', lower=0, upper=100)
# Expected value
y_hat = a[district] + b[district] * minimal_df['Indicator_Black'].values
# Data likelihood
y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=minimal_df['WhetherDefendantWasReleasedPretrial'].values)
step = pm.NUTS(target_accept=0.9)
varying_intercept_slope_trace=pm.sample(8000, step = step, random_seed = SEED)
pm.model_to_graphviz(varying_intercept_slope)
az.plot_forest(varying_intercept_slope_trace);
with varying_intercept_slope:
pm.plot_trace(varying_intercept_slope_trace)
varying_intercept_slope_trace
<xarray.Dataset>
Dimensions: (chain: 4, draw: 8000, a_dim_0: 31, b_dim_0: 31)
Coordinates:
* chain (chain) int64 0 1 2 3
* draw (draw) int64 0 1 2 3 4 5 6 7 ... 7993 7994 7995 7996 7997 7998 7999
* a_dim_0 (a_dim_0) int64 0 1 2 3 4 5 6 7 8 9 ... 22 23 24 25 26 27 28 29 30
* b_dim_0 (b_dim_0) int64 0 1 2 3 4 5 6 7 8 9 ... 22 23 24 25 26 27 28 29 30
Data variables:
mu_a (chain, draw) float64 0.8377 0.8338 0.839 ... 0.8477 0.8388 0.8401
mu_b (chain, draw) float64 -0.03049 -0.04328 ... -0.03256 -0.03794
a (chain, draw, a_dim_0) float64 0.796 0.819 0.85 ... 0.7885 0.8923
b (chain, draw, b_dim_0) float64 -0.04281 -0.03986 ... 0.0009009
sigma_a (chain, draw) float64 0.03069 0.03164 0.03194 ... 0.04317 0.03777
sigma_b (chain, draw) float64 0.0398 0.03874 0.02839 ... 0.03045 0.05148
sigma_y (chain, draw) float64 0.3748 0.3743 0.372 ... 0.369 0.3745 0.3686
Attributes:
created_at: 2022-12-06T17:59:44.518942
arviz_version: 0.12.1
inference_library: pymc
inference_library_version: 4.2.2
sampling_time: 213.41605377197266
tuning_steps: 1000<xarray.Dataset>
Dimensions: (chain: 4, draw: 8000, y_like_dim_0: 22463)
Coordinates:
* chain (chain) int64 0 1 2 3
* draw (draw) int64 0 1 2 3 4 5 6 ... 7994 7995 7996 7997 7998 7999
* y_like_dim_0 (y_like_dim_0) int64 0 1 2 3 4 ... 22459 22460 22461 22462
Data variables:
y_like (chain, draw, y_like_dim_0) float64 -2.374 -2.62 ... -0.07564
Attributes:
created_at: 2022-12-06T18:00:08.481877
arviz_version: 0.12.1
inference_library: pymc
inference_library_version: 4.2.2<xarray.Dataset>
Dimensions: (chain: 4, draw: 8000)
Coordinates:
* chain (chain) int64 0 1 2 3
* draw (draw) int64 0 1 2 3 4 5 ... 7995 7996 7997 7998 7999
Data variables: (12/16)
tree_depth (chain, draw) int64 4 5 5 4 4 4 4 4 ... 4 4 4 4 4 4 4 4
process_time_diff (chain, draw) float64 0.01416 0.02832 ... 0.01868
energy_error (chain, draw) float64 -0.0115 -0.1693 ... 0.2123
perf_counter_start (chain, draw) float64 3.236e+05 3.236e+05 ... 3.238e+05
max_energy_error (chain, draw) float64 -0.3379 -0.3193 ... 0.2123
smallest_eigval (chain, draw) float64 nan nan nan nan ... nan nan nan
... ...
perf_counter_diff (chain, draw) float64 0.01416 0.02832 ... 0.01868
n_steps (chain, draw) float64 15.0 31.0 31.0 ... 15.0 15.0 15.0
lp (chain, draw) float64 -9.559e+03 ... -9.566e+03
step_size (chain, draw) float64 0.234 0.234 ... 0.2552 0.2552
step_size_bar (chain, draw) float64 0.2279 0.2279 ... 0.2785 0.2785
energy (chain, draw) float64 9.59e+03 9.591e+03 ... 9.596e+03
Attributes:
created_at: 2022-12-06T17:59:44.532085
arviz_version: 0.12.1
inference_library: pymc
inference_library_version: 4.2.2
sampling_time: 213.41605377197266
tuning_steps: 1000<xarray.Dataset>
Dimensions: (y_like_dim_0: 22463)
Coordinates:
* y_like_dim_0 (y_like_dim_0) int64 0 1 2 3 4 ... 22459 22460 22461 22462
Data variables:
y_like (y_like_dim_0) float64 0.0 0.0 0.0 1.0 0.0 ... 1.0 1.0 1.0 1.0
Attributes:
created_at: 2022-12-06T18:00:08.498836
arviz_version: 0.12.1
inference_library: pymc
inference_library_version: 4.2.2# County predictions
xvals = np.arange(2)
b = varying_intercept_slope_trace.posterior.a.mean(axis=1).mean(axis=0).values
m = varying_intercept_slope_trace.posterior.b.mean(axis=1).mean(axis=0).values
plt.figure(figsize = (8,10))
for bi,mi in zip(b,m):
plt.plot(xvals, mi*xvals + bi, 'bo-', alpha=0.4)
plt.xlim(-0.1, 1.1)
plt.xlabel("Black Indicator")
plt.ylabel("Proportion released pretrial per judicial circuit")
plt.savefig("District_outcomes_for_black_defendants.png");
localities_cols=['Defendant_Race', 'WhetherDefendantWasReleasedPretrial','Locality_Name']
locality_df=pd.read_csv("~/MSDS/Pretrial Detention Capstone/October 2017 Cohort_Virginia Pretrial Data Project_Deidentified FINAL Update_10272021.csv", usecols=localities_cols)
locality_df=locality_df.query("((Defendant_Race=='W')|(Defendant_Race=='B'))")
locality_df=locality_df.query("Locality_Name!=' '")
locality_df=locality_df.query("WhetherDefendantWasReleasedPretrial!=9")
locality_df['Indicator_Black']=(locality_df['Defendant_Race']=='B')
# lookup table (dict) for each county
localities=locality_df.Locality_Name.unique()
num_local=locality_df.Locality_Name.nunique()
local_lookup=dict(zip(localities, range(num_local)))
locality_df['Locality_Number']=locality_df.Locality_Name.map(local_lookup)
localities=locality_df['Locality_Number'].values
with pm.Model() as localities_model:
# Priors
mu_a = pm.Normal('mu_a', mu=0., sigma=1e5)
sigma_a = pm.Exponential("sigma_a", 0.5)
mu_b = pm.Normal('mu_b', mu=0., sigma=1e5)
sigma_b = pm.HalfCauchy('sigma_b', 5)
#sigma_b = pm.Exponential("sigma_b", .05)
# Random intercepts
a = pm.Normal('a', mu=mu_a, sigma=sigma_a, shape=num_local)
# Random slopes
b = pm.Normal('b', mu=mu_b, sigma=sigma_b, shape=num_local)
# Model error
sigma_y = pm.Uniform('sigma_y', lower=0, upper=100)
# Expected value
y_hat = a[localities] + b[localities] * locality_df['Indicator_Black'].values
# Data likelihood
y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=locality_df['WhetherDefendantWasReleasedPretrial'].values)
step = pm.NUTS(target_accept=0.9)
varying_intercept_slope_trace=pm.sample(8000, step = step, random_seed = SEED)
Multiprocess sampling (4 chains in 4 jobs) NUTS: [mu_a, sigma_a, mu_b, sigma_b, a, b, sigma_y]
Sampling 4 chains for 1_000 tune and 8_000 draw iterations (4_000 + 32_000 draws total) took 309 seconds.
pm.model_to_graphviz(localities_model)
with localities_model:
pm.plot_trace(varying_intercept_slope_trace)
az.plot_forest(varying_intercept_slope_trace);
locality_summary=az.summary(varying_intercept_slope_trace)
locality_summary.loc[locality_summary['mean']< -.056].sort_values('mean')
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| b[1] | -0.112 | 0.033 | -0.176 | -0.053 | 0.001 | 0.0 | 2239.0 | 2769.0 | 1.0 |
| b[13] | -0.087 | 0.043 | -0.170 | -0.011 | 0.001 | 0.0 | 4221.0 | 11471.0 | 1.0 |
| b[71] | -0.080 | 0.040 | -0.155 | -0.009 | 0.001 | 0.0 | 5118.0 | 12015.0 | 1.0 |
| b[67] | -0.070 | 0.040 | -0.150 | -0.001 | 0.000 | 0.0 | 7767.0 | 14886.0 | 1.0 |
| b[15] | -0.063 | 0.026 | -0.113 | -0.014 | 0.000 | 0.0 | 11492.0 | 18885.0 | 1.0 |
| b[80] | -0.061 | 0.039 | -0.136 | 0.010 | 0.000 | 0.0 | 11217.0 | 13927.0 | 1.0 |
| b[61] | -0.060 | 0.039 | -0.137 | 0.012 | 0.000 | 0.0 | 12038.0 | 12130.0 | 1.0 |
| b[92] | -0.060 | 0.037 | -0.132 | 0.008 | 0.000 | 0.0 | 11634.0 | 15263.0 | 1.0 |
| b[35] | -0.057 | 0.022 | -0.099 | -0.018 | 0.000 | 0.0 | 12737.0 | 19083.0 | 1.0 |
| b[91] | -0.057 | 0.039 | -0.130 | 0.016 | 0.000 | 0.0 | 14945.0 | 13087.0 | 1.0 |
top_diff=locality_summary.loc[locality_summary['mean']< -.056].sort_values('mean').index.str.extract("(\d+)").values.astype(int)
[key for key, val in local_lookup.items() if val in top_diff]
['Arlington County', 'Greensville County', 'Loudoun County', 'Richmond City', 'Lunenburg County', 'Warren County', 'Accomack County', 'Bristol City', 'Southampton County', 'Brunswick County']
locality_summary.loc[locality_summary['mean']>0].sort_values('mean').index.str.extract("b\[(\d+)")
#[key for key, val in local_lookup.items() if val in pos_slope]
| 0 | |
|---|---|
| 0 | True |
| 1 | True |
| 2 | False |
| 3 | False |
| 4 | False |
| ... | ... |
| 127 | False |
| 128 | False |
| 129 | False |
| 130 | False |
| 131 | False |
132 rows × 1 columns
#localities predictions
xvals = np.arange(2)
b = varying_intercept_slope_trace.posterior.a.mean(axis=1).mean(axis=0).values
m = varying_intercept_slope_trace.posterior.b.mean(axis=1).mean(axis=0).values
plt.figure(figsize = (8,10))
for bi,mi in zip(b,m):
plt.plot(xvals, mi*xvals + bi, 'bo-', alpha=0.4)
plt.xlim(-0.1, 1.1)
plt.xlabel("Black Indicator")
plt.ylabel("Proportion released pretrial per locality")
plt.savefig("Localities_outcomes_for_black_defendants.png");